<?php

    #-------------------------------------------------------------#
	# This file is part of the Tekuna MVC Microframework for PHP. #
	# (c) 2009 Niels Jäckel <niels.jaeckel@tekuna.org>            #
	# Please visit http://tekuna.org/ for more information.       #
	#                                                             #
	# This software is licensed under the MIT License. Please     #
	# view the LICENSE file for the full text of this license.    #
    #-------------------------------------------------------------#


	Tekuna :: load('org.tekuna.core.filter.TekunaFilter');
	Tekuna :: load('org.tekuna.core.filter.data.EntityDecodeFilter');


	/**
	 * This filter tries to remove all scripting things from the input
	 * data. It supports several well-formed scripting things, as well
	 * as scripting abilities that are possible due to browser bugs.
	 *
	 * This filter can be used as a sanitizing input filter to prevent XSS
	 * attacks, but preserve the (X)HTML.
	 */

	class NoScriptFilter implements TekunaFilter {


		/**
		 * The filter method applies all implemented un-scripting
		 * patterns
		 *
		 * @param mixed $sData all input data
		 * @return string the input without the scripting things
		 */

		public function filter($sData) {

			$arrPatterns = array();

			// decode all HTML and XML entities
			$objEDF = new EntityDecodeFilter();
			$sData = $objEDF -> filter($sData);


			// remove script areas
			$arrPatterns[] = '~<\s*script[^>]*>(?:.*<\s*/\s*script\s*>)?~is';


			// remove script areas like this:
			// <script src="http://badhost.com/xss.js" </script
			// because of Firefox bug #226495 (@Bugzilla)
			$arrPatterns[] = '~<\s*script[^>]*(?:.*<\s*/\s*script\s*)?~is';


			// remove script links
			// encoded script links will be removed too because of
			// the global decryption at the beginning
			$arrPatterns[] = '~j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:|' .		// javascript:
			                 'j\s*s\s*c\s*r\s*i\s*p\s*t\s*:|' .						// jscript:
			                 'l\s*i\s*v\s*e\s*s\s*c\s*r\s*i\s*p\s*t\s*:|' .			// livescript:
			                 'm\s*o\s*c\s*h\s*a\s*:|' .								// mocha:
			                 'd\s*a\s*t\s*a\s*:|' .									// data:
			                 'v\s*b\s*s\s*c\s*r\s*i\s*p\s*t\s*:~i';					// vbscript


			// remove event attributes (well-formed)
			// <a ... onclick="...">
			while (preg_match('~<[^>]*(on[a-z]+\s*=\s*(?:(?:"[^"]*")|(?:\'[^\']*\')))[^>]*>?~is', $sData, $arrRes)) {

				$sData = str_replace($arrRes[1], '', $sData);
			}


			// remove event attributes (not well-formed; destructive)
			// e.g. <a href="" onclick=alert('xss');
			$arrPatterns[] = '~<[^>]*on[a-z]+[^a-z0-9=]*=\s*[^"\'\s][^>]*>?~is';


			// remove scripts in styles areas (Mozilla: -moz-binding)
			// because of Firefox Bug #324253 (@Bugzilla)
			while (preg_match('~<\s*style[^>]*>?.*{.*(-moz-binding\s*:[^;]*;?)~is', $sData, $arrRes)) {

				$sData = str_replace($arrRes[1], '', $sData);
			}


			// remove scripts in styles areas (Internet Explorer: expression)
			// top: expression(alert('xss'));
			while (preg_match('~<\s*style[^>]*>?.*{.*(expression\s*\([^\)]*\)*);?~is', $sData, $arrRes)) {

				$sData = str_replace($arrRes[1], '', $sData);
			}


			// remove scripts in styles attributes (Mozilla: -moz-binding)
			// because of Firefox Bug #324253 (@Bugzilla)
			while (preg_match('~<[^>]*style\s*=.*(-moz-binding\s*:[^;]*;?)~is', $sData, $arrRes)) {

				$sData = str_replace($arrRes[1], '', $sData);
			}


			// remove scripts in styles attributes (Internet Explorer: expression)
			// <p style="top:expression(alert('xss'));">XSS Test</p>
			while (preg_match('~<[^>]*style\s*=.*(expression\s*\([^\)]*\)*);?~is', $sData, $arrRes)) {

				$sData = str_replace($arrRes[1], '', $sData);
			}


			// perform replacements
			$sData = preg_replace($arrPatterns, '', $sData);

			return $sData;
		}
	}
